# Automation Setup - Configure Azure Machine Learning Compute Cluster and Managed Identity

__Notebook Version:__ 1.0<br>
__Python Version:__ Python 3.8 - AzureML<br>
__Required Packages:__ No<br>
__Platforms Supported:__  Azure Machine Learning Notebooks
     
__Data Source Required:__ No 
    
### Description
This is the first notebook of series for setting up Microsoft Sentinel notebook automation platform based on Azure Machine Learning Pipelines.</br>
This notebook provides step-by-step instructions to create Azure Machine Learning compute cluster, and add user assigned managed identity to the compute cluster.</br>
This AML compute cluster will be used as the computing power for Sentinel notebook automation.  It can be used for multiple automated notebooks.</br>
Adding user assigned managed identity to the compute cluster will enable schedule notebooks to access tenant's Azure resources.

*** Please run the cells sequentially to avoid errors.  Please do not use "run all cells". *** <br>

## Table of Contents
1. Warm-up
2. Authentication to Azure Resources
3. User-assigned managed identity
4. Azure Machine Learning Compute Cluster

## 1. Warm-up

In [None]:
# Azure Machine Learning and Pipeline SDK-specific imports
# azureml
import azureml.core
from azureml.core import Workspace, Experiment
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.core.datastore import Datastore

# azure common/core
from azure.common.credentials import get_azure_cli_credentials
from azure.mgmt.resource import ResourceManagementClient

# Python/ipython
import json
from IPython.display import display, HTML, Markdown

print("SDK version:", azureml.core.VERSION)

In [None]:
# Functions will be used in this notebook
def read_config_values(file_path):
    "This loads pre-generated parameters for Microsoft Sentinel Workspace"
    with open(file_path) as json_file:
        if json_file:
            json_config = json.load(json_file)
            return (json_config["tenant_id"],
                    json_config["subscription_id"],
                    json_config["resource_group"],
                    json_config["workspace_id"],
                    json_config["workspace_name"],
                    json_config["user_alias"],
                    json_config["user_object_id"])
    return None

def has_valid_token():
    "Check to see if there is a valid AAD token"
    try:
        credentials, sub_id = get_azure_cli_credentials()
        creds = credentials._get_cred(resource=None)
        token = creds._token_retriever()[2]
        print("Successfully signed in.")
        return True
    except Exception as ex:
        if "Please run 'az login' to setup account" in str(ex):
            print("Please sign in first.")
            return False
        elif "AADSTS70043: The refresh token has expired" in str(ex):
            message = "**The refresh token has expired. <br> Please continue your login process. Then: <br> 1. If you plan to run multiple notebooks on the same compute instance today, you may restart the compute instance by clicking 'Compute' on left menu, then select the instance, clicking 'Restart'; <br> 2. Otherwise, you may just restart the kernel from top menu. <br> Finally, close and re-load the notebook, then re-run cells one by one from the top.**"
            display(Markdown(message))
            return False
        elif "[Errno 2] No such file or directory: '/home/azureuser/.azure/azureProfile.json'" in str(ex):
            print("Please sign in.")
            return False
        else:
            print(str(ex))
            return False
    except:
        print("Please restart the kernel, and run 'az login'.")
        return False

In [None]:
# Calling the above function to populate Microsoft Sentinel workspace parameters
# The file, config.json, was generated by the system, however, you may modify the values, or manually set the variables
tenant_id, subscription_id, resource_group, workspace_id, workspace_name, user_alias, user_object_id = read_config_values('config.json');
print("Subscription Id: " + subscription_id)

## 2. Authentication to Azure Resources

In [None]:
# Azure CLI is used to get device code to login into Azure, you need to copy the code and open the DeviceLogin site.
# You may add [--tenant $tenant_id] to the command
if has_valid_token() == False:
    !echo -e '\e[42m'
    !az login --tenant $tenant_id --use-device-code

## 3. User-assigned Managed Identity 

In [None]:
# 1. Please enter name of an Azure resource group with which you want to create an user assigned managed identity
resource_group = 'myresourcegroup'

In [None]:
# 2. Please enter name for an existing user assigned managed identity or for creating a new user assigned managed identity
user_assigned_managed_identity = 'myuai2022'

In [None]:
# 3.  Create a new user assigned managed identity if it doesn't exist
id_list = !az identity list --subscription $subscription_id -g $resource_group

if len(id_list.grep('"name"', field=0).grep(user_assigned_managed_identity, field=1)) > 0:
    print('Found existing user-assigned managed identity.')
else:
    print('Create a new user-assigned managed identity.')
    !az identity create --subscription $subscription_id -g $resource_group -n $user_assigned_managed_identity
    new_uamiList = !az identity list --subscription $subscription_id -g $resource_group

In [None]:
# 4. Assign the user assigned managed identity a contributor access to the target resource: resource_group, as default
#  NEED TO RUN ONLY ONCE
principal_id_raw = !az identity show --subscription $subscription_id -g $resource_group --name $user_assigned_managed_identity --query principalId
uami_id_raw = !az identity show --subscription $subscription_id -g $resource_group --name $user_assigned_managed_identity --query id
principal_id = principal_id_raw[0][1:-1]
uami_id = uami_id_raw[0][1:-1]
print(uami_id)

target_resource_id = '/subscriptions/{0}/resourceGroups/{1}'.format(subscription_id, resource_group)
!az role assignment create --assignee $principal_id --role 'Contributor' --scope $target_resource_id

## 4. Azure Machine Learning Compute Cluster

In [None]:
# 1. Please enter name for an existing compute cluster or for creating a new compute cluster
amlcompute_cluster_name = 'compcl2022'

In [None]:
# 2. Get AML workspace
# Enter current AML workspace name
current_aml_workspace_name = 'auto2022'
ws = Workspace.get(name=current_aml_workspace_name, subscription_id=subscription_id, resource_group=resource_group)
print(ws)

In [None]:
# 2. Check if this compute cluster already exists in the workspace.  If not, a new one will be created.
# The newly created user assigned managed identity was added to the new compute cluster
found = False
cts = ws.compute_targets
if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':
    found = True
    print('Found existing compute target.')
    compute_target = cts[amlcompute_cluster_name]
if not found:
    print('Creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2", max_nodes = 4, identity_type="UserAssigned", identity_id=[uami_id]) 
    # for GPU, use "STANDARD_NC6" #vm_priority = 'lowpriority', # optional                           
    
    # Create the cluster.  For a more detailed view of current AmlCompute status, use get_status().
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)
    compute_target.wait_for_completion(show_output = True, min_node_count = 1, timeout_in_minutes = 10)